Adding some strings and geometry algorithms to notebook.
[andmenj-acm.git] / Mi manual de algoritmos / version_actual / src / strings / suffix_arrays.cpp
blobd726d60c7e4c0934af1f9af75088250cce13d7d6
1 ////////////////////////////////////////////////////////////////
2 // Begins Suffix Arrays implementation //
3 ////////////////////////////////////////////////////////////////
4 // O(n log n) - Manber and Myers algorithm //
5 // Refer to "Suffix arrays: A new method for on-line //
6 // string searches", by Udi Manber and Gene Myers //
7 ////////////////////////////////////////////////////////////////
8 //Usage:
9 // - Fill str with the characters of the string.
10 // - Call SuffixSort(n), where n is the length of the string
11 // stored in str.
12 // - That's it!
14 //Output:
15 // pos = The suffix array. Contains the n suffixes of str sorted
16 // in lexicographical order.
17 // Each suffix is represented as a single integer (the
18 // position of str where it starts).
19 // rank = The inverse of the suffix array. rank[i] = the index
20 // of the suffix str[i..n) in the pos array. (In other
21 // words, pos[i] = k <==> rank[k] = i)
22 // With this array, you can compare two suffixes in O(1):
23 // Suffix str[i..n) is smaller than str[j..n) if and
24 // only if rank[i] < rank[j]
26 const int N = 1000005; // Maximum size of input string
27 int str[N]; //input
28 int rank[N], pos[N]; //output
29 int cnt[N], next[N]; //internal
30 bool bh[N], b2h[N];
32 // Compares two suffixes according to their first characters
33 bool smaller_first_char(int a, int b){
34 return str[a] < str[b];
37 void suffixSort(int n){
38 //sort suffixes according to their first characters
39 for (int i=0; i<n; ++i){
40 pos[i] = i;
42 sort(pos, pos + n, smaller_first_char);
43 //{pos contains the list of suffixes sorted by their first
44 //character}
46 for (int i=0; i<n; ++i){
47 bh[i] = i == 0 || str[pos[i]] != str[pos[i-1]];
48 b2h[i] = false;
51 for (int h = 1; h < n; h <<= 1){
52 //{bh[i] == false if the first h characters of pos[i-1] ==
53 // the first h characters of pos[i]}
54 int buckets = 0;
55 for (int i=0, j; i < n; i = j){
56 j = i + 1;
57 while (j < n && !bh[j]) j++;
58 next[i] = j;
59 buckets++;
61 if (buckets == n) break; // We are done! Lucky bastards!
62 //{suffixes are separted in buckets containing strings
63 // starting with the same h characters}
65 for (int i = 0; i < n; i = next[i]){
66 cnt[i] = 0;
67 for (int j = i; j < next[i]; ++j){
68 rank[pos[j]] = i;
72 cnt[rank[n - h]]++;
73 b2h[rank[n - h]] = true;
74 for (int i = 0; i < n; i = next[i]){
75 for (int j = i; j < next[i]; ++j){
76 int s = pos[j] - h;
77 if (s >= 0){
78 int head = rank[s];
79 rank[s] = head + cnt[head]++;
80 b2h[rank[s]] = true;
83 for (int j = i; j < next[i]; ++j){
84 int s = pos[j] - h;
85 if (s >= 0 && b2h[rank[s]]){
86 for (int k = rank[s]+1; !bh[k] && b2h[k]; k++){
87 b2h[k] = false;
92 for (int i=0; i<n; ++i){
93 pos[rank[i]] = i;
94 bh[i] |= b2h[i];
97 for (int i=0; i<n; ++i){
98 rank[pos[i]] = i;
101 ////////////////////////////////////////////////////////////////
102 // End of Suffix Arrays implementation //
103 ////////////////////////////////////////////////////////////////
106 ////////////////////////////////////////////////////////////////
107 // Begin of Longest Common Prefix implementation //
108 ////////////////////////////////////////////////////////////////
109 // O(n) //
110 // Refer to "Linear-Time Longest-Common-Prefix Computation //
111 // in Suffix Arrays and Its Applications" by Toru Kasai, //
112 // Gunho Lee, Hiroki Arimura, Setsuo Arikawa, and Kunsoo Park.//
113 ////////////////////////////////////////////////////////////////
115 int height[N];
116 // - height[i] = length of the longest common prefix of suffix
117 // pos[i] and suffix pos[i-1]
118 // - height[0] = 0
119 void getHeight(int n){
120 for (int i=0; i<n; ++i) rank[pos[i]] = i;
121 height[0] = 0;
122 for (int i=0, h=0; i<n; ++i){
123 if (rank[i] > 0){
124 int j = pos[rank[i]-1];
125 while (i + h < n && j + h < n && str[i+h] == str[j+h]){
126 h++;
128 height[rank[i]] = h;
129 if (h > 0) h--;
133 ////////////////////////////////////////////////////////////////
134 // End of Longest Common Prefix implementation //
135 ////////////////////////////////////////////////////////////////